library(cluster)
library(broom)
library(glmnet)
library(modelr)
library(ggplot2)
library(cowplot)
library(mongolite)
library(ggmap)
library(dplyr)
library(sp)
library(lubridate)
library(tidyr)
library(reshape2)
library(stringr)
# barriosCategoricos = inner_join(mcuadrado,analisis_Barrios , by="barrio")%>%select(barrio, mCuadradoC, pVentasC)
# preciosModelo = inner_join(precios,barriosCategoricos , by="barrio")
# preciosModelo = inner_join(preciosModelo,productos , by=c("producto"="id"))
# preciosModelo = select (preciosModelo,-c(fecha,nombre,presentacion,producto))
# preciosModelo
#
#
# write.csv(preciosModelo,"/home/ignacio/datos/facultad/repos/tpEspecializacion/data/preciosModelo.csv", row.names = TRUE)
preciosModelo <- read.csv(file = '/home/ignacio/datos/facultad/repos/tpEspecializacion/data/preciosModelo.csv')
preciosModelo = select (preciosModelo,-c(X,barrio,sucursal,sucursalTipo))
preciosModelo <- preciosModelo %>% resample_partition(c(train=0.3,test=0.7))
preciosModelo = preciosModelo$train %>% as_tibble()
train_test <- preciosModelo %>% resample_partition(c(train=0.7,test=0.3))
precios_train <- train_test$train %>% as_tibble()
precios_test <- train_test$test %>% as_tibble()
# banderaDescripcion + medicion + barrio + banderaDescripcion + pVentasC + mCuadradoC
lm_precio2bandera = lm(formula = precio~banderaDescripcion, data=preciosModelo)
lm_precio2medicion = lm(formula = precio~medicion, data=preciosModelo)
#lm_precio2barrio = lm(formula = precio~barrio, data=precios)
summary(lm_precio2bandera)
#coef(lm_precio2bandera)
summary(lm_precio2medicion)
glance(lm_precio2bandera)
glance(lm_precio2medicion)
# banderaDescripcion + sucursalTipo + medicion + pVentasC + mCuadradoC
lm_precioMultiple = lm(precio ~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data=preciosModelo)
# medicion - barrio - sucursalTipo - banderaDescripcion
summary(lm_precioMultiple)
Call:
lm(formula = precio ~ banderaDescripcion + medicion + pVentasC +
mCuadradoC + marca, data = preciosModelo)
Residuals:
Min 1Q Median 3Q Max
-219.91 -12.45 -1.09 8.85 368.69
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 198.29125 1.72366 115.041 < 2e-16 ***
banderaDescripcionDisco 4.60671 0.17144 26.871 < 2e-16 ***
banderaDescripcionExpress -3.20800 0.26391 -12.156 < 2e-16 ***
banderaDescripcionHipermercado Carrefour -4.65928 0.30709 -15.172 < 2e-16 ***
banderaDescripcionJOSIMAR SUPERMERCADOS -4.84252 0.77112 -6.280 3.39e-10 ***
banderaDescripcionJumbo 3.94146 0.31916 12.349 < 2e-16 ***
banderaDescripcionMarket -4.13624 0.16506 -25.060 < 2e-16 ***
banderaDescripcionMi Changomas -3.40764 0.94764 -3.596 0.000323 ***
banderaDescripcionSupermercados DIA -5.07030 0.25715 -19.717 < 2e-16 ***
banderaDescripcionVea 1.22593 0.24227 5.060 4.19e-07 ***
medicion 1.01076 0.01980 51.061 < 2e-16 ***
pVentasCbajo 0.30080 0.24994 1.204 0.228783
pVentasCmedio 0.03088 0.16613 0.186 0.852526
mCuadradoCbajo -0.10130 0.17685 -0.573 0.566771
mCuadradoCmedio -0.32164 0.13485 -2.385 0.017069 *
marca7 UP -137.89813 2.13015 -64.736 < 2e-16 ***
marca9 DE ORO -176.15951 1.90340 -92.550 < 2e-16 ***
marcaACTIMEL -56.10958 2.56182 -21.902 < 2e-16 ***
marcaADES -157.01825 1.95050 -80.502 < 2e-16 ***
marcaĆGUILA -122.27633 2.00493 -60.988 < 2e-16 ***
marcaAIMĆ -36.19008 2.13883 -16.920 < 2e-16 ***
marcaALA -86.13896 1.80748 -47.657 < 2e-16 ***
marcaALELUYA -122.94738 2.52903 -48.614 < 2e-16 ***
marcaALICANTE -159.60632 2.15442 -74.083 < 2e-16 ***
marcaALMA MORA -39.21732 2.11090 -18.578 < 2e-16 ***
marcaALWAYS -125.75331 2.54611 -49.390 < 2e-16 ***
marcaAMANDA -85.41617 2.52269 -33.859 < 2e-16 ***
marcaAMOR -131.68104 2.51658 -52.325 < 2e-16 ***
marcaAQUARIUS -153.17470 1.80415 -84.901 < 2e-16 ***
marcaARCOR -155.80267 1.75248 -88.904 < 2e-16 ***
marcaARIEL 81.35897 2.01726 40.332 < 2e-16 ***
marcaARLISTĆN -44.67356 2.12490 -21.024 < 2e-16 ***
marcaARMONĆA -180.77299 2.14961 -84.096 < 2e-16 ***
marcaARROCITAS -154.38957 2.15024 -71.801 < 2e-16 ***
marcaAXE -112.42388 1.84790 -60.839 < 2e-16 ***
marcaAYUDĆN -148.65242 1.84940 -80.379 < 2e-16 ***
marcaBAGLEY -135.94366 2.09560 -64.871 < 2e-16 ***
marcaBC -184.89126 1.94033 -95.288 < 2e-16 ***
marcaBENJAMĆN -60.54702 2.16285 -27.994 < 2e-16 ***
marcaBIMBO -139.66088 2.51049 -55.631 < 2e-16 ***
marcaBLEM -57.94081 1.93725 -29.909 < 2e-16 ***
marcaBODEGA TRAPICHE -114.10991 2.49483 -45.739 < 2e-16 ***
marcaBOLS -67.61167 2.52276 -26.801 < 2e-16 ***
marcaBON O BON -179.64268 2.41561 -74.367 < 2e-16 ***
marcaBONAFIDE -128.92564 2.53535 -50.851 < 2e-16 ***
marcaBONAQUA -168.96695 2.13885 -78.999 < 2e-16 ***
marcaBRAHMA -135.59944 2.45823 -55.161 < 2e-16 ***
marcaBRANCA 8.54327 2.10148 4.065 4.80e-05 ***
marcaBUDWEISER -126.91840 2.51453 -50.474 < 2e-16 ***
marcaBUTTER TOFFEES -143.31532 2.12909 -67.313 < 2e-16 ***
marcaCABRALES -73.28866 2.12961 -34.414 < 2e-16 ***
marcaCACHAMAI -161.68245 2.52071 -64.142 < 2e-16 ***
marcaCALLIA -84.20334 2.00989 -41.895 < 2e-16 ***
marcaCAMPARI 50.51196 2.50850 20.136 < 2e-16 ***
marcaCAĆUELAS -142.65323 2.00803 -71.041 < 2e-16 ***
marcaCAPITĆN MORGAN 102.39305 3.33682 30.686 < 2e-16 ***
marcaCAREFREE -60.18091 1.87353 -32.122 < 2e-16 ***
marcaCARIOCA -188.36207 2.52901 -74.481 < 2e-16 ***
marcaCARREFOUR -118.23961 4.05825 -29.136 < 2e-16 ***
marcaCASANCREM -124.71248 1.91978 -64.962 < 2e-16 ***
marcaCASANTO -168.56967 2.42498 -69.514 < 2e-16 ***
marcaCASTEL -102.23429 2.49491 -40.977 < 2e-16 ***
marcaCASTELL -157.07325 1.94522 -80.748 < 2e-16 ***
marcaCAT CHOW -127.55390 2.49290 -51.167 < 2e-16 ***
marcaCBSE -110.47936 1.99569 -55.359 < 2e-16 ***
marcaCELUSAL -160.93824 1.92997 -83.389 < 2e-16 ***
marcaCEPITA -167.91045 1.81950 -92.284 < 2e-16 ***
marcaCERAMICOL -120.77981 2.16286 -55.843 < 2e-16 ***
marcaCEREAL MIX -96.04001 1.93604 -49.606 < 2e-16 ***
marcaCEREALITAS -134.30273 2.13444 -62.922 < 2e-16 ***
marcaCHANDON 96.53849 2.15924 44.709 < 2e-16 ***
marcaCHOCOLINAS -161.08396 2.11630 -76.116 < 2e-16 ***
marcaCIF -146.03608 1.77885 -82.096 < 2e-16 ***
marcaCINDOR -139.09480 2.12910 -65.330 < 2e-16 ***
marcaCINZANO -68.77240 2.47241 -27.816 < 2e-16 ***
marcaCITRIC -134.54692 2.11981 -63.471 < 2e-16 ***
marcaCLIGHT -193.82313 1.80967 -107.104 < 2e-16 ***
marcaCOCA COLA -138.00163 1.83247 -75.309 < 2e-16 ***
marcaCOCINERO -39.01233 1.90576 -20.471 < 2e-16 ***
marcaCOLGATE -122.66165 1.80778 -67.852 < 2e-16 ***
marcaCOLON -103.46981 2.11987 -48.810 < 2e-16 ***
marcaCOMFORT -113.59488 2.43464 -46.658 < 2e-16 ***
marcaCOQUITAS -170.01757 2.49481 -68.148 < 2e-16 ***
marcaCORONA -136.36139 2.47973 -54.990 < 2e-16 ***
marcaCOTO -125.78605 3.90863 -32.182 < 2e-16 ***
marcaCRIOLLITAS -147.93901 1.93605 -76.413 < 2e-16 ***
marcaCRUSH -160.80440 2.13017 -75.489 < 2e-16 ***
marcaCRUZ DE MALTA -109.13862 2.13828 -51.040 < 2e-16 ***
marcaCUSENIER -78.36174 1.95090 -40.167 < 2e-16 ***
marcaCUTEX -142.90525 2.57105 -55.582 < 2e-16 ***
marcaDADA -42.10277 1.99790 -21.074 < 2e-16 ***
marcaDANETTE -155.47588 1.91992 -80.980 < 2e-16 ***
marcaDĆNICA -156.74175 2.49100 -62.923 < 2e-16 ***
marcaDANONINO -166.55299 1.92515 -86.514 < 2e-16 ***
marcaDĆA -148.22982 6.56156 -22.591 < 2e-16 ***
marcaDOG CHOW -22.62106 2.00883 -11.261 < 2e-16 ***
marcaDON DAVID 32.32714 2.52897 12.783 < 2e-16 ***
marcaDON SATUR -177.90658 1.99812 -89.037 < 2e-16 ***
marcaDON VICENTE -128.51714 1.93983 -66.252 < 2e-16 ***
marcaDOS ANCLAS -162.10630 1.79408 -90.356 < 2e-16 ***
marcaDOVE -118.41401 1.82868 -64.754 < 2e-16 ***
marcaDR LEMON -137.69946 2.15977 -63.756 < 2e-16 ***
marcaDRIVE 68.67541 2.58766 26.540 < 2e-16 ***
marcaECHO -142.14056 2.49289 -57.018 < 2e-16 ***
marcaECO DE LOS ANDES -174.09288 2.59746 -67.024 < 2e-16 ***
marcaELEMENTOS -52.51147 2.16848 -24.216 < 2e-16 ***
marcaELITE -143.92018 2.01646 -71.373 < 2e-16 ***
marcaESTANCIA MENDOZA -113.25004 2.11482 -53.551 < 2e-16 ***
marcaESTRELLA -124.42743 1.95253 -63.726 < 2e-16 ***
marcaETCHART -107.60114 2.51867 -42.721 < 2e-16 ***
marcaEXPRESS -146.87548 1.96714 -74.664 < 2e-16 ***
marcaEXQUISITA -160.16688 1.78223 -89.869 < 2e-16 ***
marcaFANTA -135.06308 1.89945 -71.106 < 2e-16 ***
marcaFAVORITA -174.94957 2.16163 -80.934 < 2e-16 ***
marcaFINCA EL PORTILLO -51.03729 2.00489 -25.456 < 2e-16 ***
marcaFINCA FLICHMAN -78.09996 2.53325 -30.830 < 2e-16 ***
marcaFINCA LAS MORAS -55.15308 2.13175 -25.872 < 2e-16 ***
marcaFINCA NATALINA -42.45296 2.51863 -16.856 < 2e-16 ***
marcaFINLANDIA -128.87324 1.84705 -69.772 < 2e-16 ***
marcaFOND DE CAVE 29.50987 2.49291 11.838 < 2e-16 ***
marcaFORMIS -138.27945 2.47614 -55.845 < 2e-16 ***
marcaFRIZEE -127.40379 2.11679 -60.187 < 2e-16 ***
marcaFRUTIGRAN -151.79374 2.48907 -60.984 < 2e-16 ***
marcaFUYĆ -152.73717 2.01341 -75.860 < 2e-16 ***
marcaGALLO -111.91774 1.93297 -57.899 < 2e-16 ***
marcaGALLO SNACKS -151.69041 2.43956 -62.179 < 2e-16 ***
marcaGANCIA -72.65155 2.14104 -33.933 < 2e-16 ***
marcaGATORADE -157.29202 1.84912 -85.063 < 2e-16 ***
marcaGENSER -86.08933 2.13994 -40.230 < 2e-16 ***
marcaGIACOMO -95.78526 2.01047 -47.643 < 2e-16 ***
marcaGILLETTE -63.45704 1.90251 -33.354 < 2e-16 ***
marcaGLACIAR -163.83023 2.12232 -77.194 < 2e-16 ***
marcaGOMES DA COSTA -103.38454 2.12085 -48.747 < 2e-16 ***
marcaGRANBY -145.54428 2.45826 -59.206 < 2e-16 ***
marcaGRANIX -151.62736 1.78494 -84.948 < 2e-16 ***
marcaGRANJA DEL SOL -95.07063 1.82919 -51.974 < 2e-16 ***
marcaGREEN HILLS -148.49983 1.99238 -74.534 < 2e-16 ***
marcaH2OH! -148.24895 2.01285 -73.651 < 2e-16 ***
marcaHARPIC -110.74843 2.11879 -52.270 < 2e-16 ***
marcaHEINEKEN -102.23365 2.42819 -42.103 < 2e-16 ***
marcaHELLMANN'S -151.34624 1.84201 -82.163 < 2e-16 ***
marcaHERBAL ESSENCES -113.96433 2.16044 -52.751 < 2e-16 ***
marcaHEREFORD -142.01379 2.10615 -67.428 < 2e-16 ***
marcaHIGIENOL -118.01849 1.95190 -60.463 < 2e-16 ***
marcaHILERET -125.01425 1.88701 -66.250 < 2e-16 ***
marcaHINDS -91.65300 2.53746 -36.120 < 2e-16 ***
marcaHIRAM WALKER -37.06543 2.49486 -14.857 < 2e-16 ***
marcaHOGAREĆAS -173.30418 2.51859 -68.810 < 2e-16 ***
marcaHUGGIES -58.68584 1.93997 -30.251 < 2e-16 ***
marcaIGUANA -145.84941 2.52894 -57.672 < 2e-16 ***
marcaIMPERIAL -123.19073 2.53536 -48.589 < 2e-16 ***
marcaISENBECK -146.54397 2.55056 -57.456 < 2e-16 ***
marcaJ&B 389.79078 2.47246 157.653 < 2e-16 ***
marcaJOHNSON'S -68.94229 2.02267 -34.085 < 2e-16 ***
marcaJORGITO -138.07653 2.14672 -64.320 < 2e-16 ***
marcaKELLOGGS -80.86496 2.44456 -33.080 < 2e-16 ***
marcaKESITAS -167.40015 2.13940 -78.246 < 2e-16 ***
marcaKILLKA 8.69983 2.50260 3.476 0.000508 ***
marcaKIN -156.34305 2.13230 -73.321 < 2e-16 ***
marcaKINDER -156.73171 2.19765 -71.318 < 2e-16 ***
marcaKNORR -163.47065 1.77837 -91.922 < 2e-16 ***
marcaKNORR QUICK -140.59278 2.15795 -65.151 < 2e-16 ***
marcaKOLYNOS -145.50512 2.48345 -58.590 < 2e-16 ***
marcaKOTEX -55.20017 1.90273 -29.011 < 2e-16 ***
marcaKRACHITOS -156.46095 1.93709 -80.771 < 2e-16 ***
marcaLA CAMPAGNOLA -138.64847 1.79305 -77.325 < 2e-16 ***
marcaLA MERCED -90.69662 2.48534 -36.493 < 2e-16 ***
marcaLA MORENITA -94.37262 2.15740 -43.744 < 2e-16 ***
marcaLA SALTEĆA -132.52001 1.89987 -69.752 < 2e-16 ***
marcaLA SERENĆSIMA -141.27571 1.75074 -80.695 < 2e-16 ***
marcaLA TRANQUERA -132.79727 2.01178 -66.010 < 2e-16 ***
marcaLA VIRGINIA -131.47425 1.86642 -70.442 < 2e-16 ***
marcaLACTAL -159.35071 2.13883 -74.504 < 2e-16 ***
marcaLATITUD 33 -40.81538 2.00995 -20.307 < 2e-16 ***
marcaLAYS -87.27208 2.44963 -35.627 < 2e-16 ***
marcaLEVITĆ -139.50420 1.83795 -75.902 < 2e-16 ***
marcaLINCOLN -173.16190 2.11831 -81.745 < 2e-16 ***
marcaLORD CHESELINE -124.46094 2.47605 -50.266 < 2e-16 ***
marcaLOS ĆRBOLES -65.96835 2.13121 -30.953 < 2e-16 ***
marcaLUCCHETTI -142.43240 1.77092 -80.428 < 2e-16 ***
marcaLYSOFORM -123.89572 1.84381 -67.195 < 2e-16 ***
marcaMAGGI -158.01542 2.13940 -73.860 < 2e-16 ***
marcaMAGISTRAL -130.95631 1.93397 -67.714 < 2e-16 ***
marcaMAIZENA -152.26310 2.15435 -70.677 < 2e-16 ***
marcaMANĆ -161.49117 2.00415 -80.578 < 2e-16 ***
marcaMARUCHAN -155.42111 2.18226 -71.220 < 2e-16 ***
marcaMATARAZZO -149.53859 1.78577 -83.739 < 2e-16 ***
marcaMAYOLIVA -164.35469 2.51866 -65.255 < 2e-16 ***
marcaMAZOLA -91.77531 2.49482 -36.786 < 2e-16 ***
marcaMC CAIN -89.00577 2.49102 -35.731 < 2e-16 ***
marcaMEDIA TARDE -165.29177 2.48531 -66.507 < 2e-16 ***
marcaMELBA -174.06801 2.47425 -70.352 < 2e-16 ***
marcaMELITAS -171.94959 2.55058 -67.416 < 2e-16 ***
marcaMELLIZAS -153.13601 2.14675 -71.334 < 2e-16 ***
marcaMENDICRIM -130.64518 2.14910 -60.791 < 2e-16 ***
marcaMENOYO -161.96846 1.92914 -83.959 < 2e-16 ***
marcaMERENGADAS -155.65945 2.12079 -73.397 < 2e-16 ***
marcaMICHEL TORINO -152.47342 2.13124 -71.542 < 2e-16 ***
marcaMILLER -103.51983 2.49677 -41.461 < 2e-16 ***
marcaMINERVA -124.18554 2.51451 -49.388 < 2e-16 ***
[ reached getOption("max.print") -- omitted 107 rows ]
---
Signif. codes: 0 ā***ā 0.001 ā**ā 0.01 ā*ā 0.05 ā.ā 0.1 ā ā 1
Residual standard error: 33.49 on 343659 degrees of freedom
Multiple R-squared: 0.7404, Adjusted R-squared: 0.7402
F-statistic: 3203 on 306 and 343659 DF, p-value: < 2.2e-16
precioMultiple_resid = augment(lm_precioMultiple)
precioMultiple_resid
#El promedio de los residuos debe ser un numero muy cercano a cero
mean(precioMultiple_resid$.resid)
[1] -1.603159e-11
Como se puede apreciar el valor obtenido del promedio de todos los residuos, es un numero cercano a cero.
ggplot(precioMultiple_resid, aes(precioMultiple_resid$.resid)) +
geom_freqpoly(binwidth = 1.5)+
labs(fill = "precioMultiple_resid$.resid", title = "Poligono de frecuencia de los residuos", x = "Residuo", y = "count")
ggplot(precioMultiple_resid, aes(sample= .std.resid))+
stat_qq()+
geom_abline()+
labs(title = "Normal QQ plot", x = "Valores teóricos", y = "Residuos estandarizados")
Se quiere validar, si los residuos siguien una distribucion teorica, N(0,1). Como podemos ver el modelo en los extremos tiende a alejarse de la distribucion Normal, por lo que puedo concluir que el modelo no esta bien definido.
ggplot(precioMultiple_resid, aes(.fitted, .resid)) +
geom_point()+
geom_hline(yintercept = 0) +
geom_smooth(se = FALSE)+
labs(title = "Residuos versus el modelo ajustado", x = "valores fitted", y = "Residuos")
Otro caso interesando para estudiar, es si los residuos tienen o no una estructura definida. Lo que se obseva es una clara estructura en el medio del grafico, esto esta indicando que una parte sistemÔtica del fenómeno que se esta perdiendo, lo cual indica que el modelo no esta funcionando como se esperaria.
# banderaDescripcion + sucursalTipo + medicion + banderaDescripcion + pVentasC + mCuadradoC
preciosModelo_log = preciosModelo
preciosModelo_log$precio = log(preciosModelo_log$precio)
preciosModelo_log$medicion = log(preciosModelo_log$medicion)
lm_precioMultiple_log = lm(precio ~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data=preciosModelo_log)
summary(lm_precioMultiple_log)
Call:
lm(formula = precio ~ banderaDescripcion + medicion + pVentasC +
mCuadradoC + marca, data = preciosModelo_log)
Residuals:
Min 1Q Median 3Q Max
-2.30022 -0.15570 0.00231 0.15488 1.76680
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.2465752 0.0175428 299.073 < 2e-16 ***
banderaDescripcionDisco 0.0523972 0.0017439 30.045 < 2e-16 ***
banderaDescripcionExpress -0.0305207 0.0026844 -11.370 < 2e-16 ***
banderaDescripcionHipermercado Carrefour -0.0521540 0.0031237 -16.696 < 2e-16 ***
banderaDescripcionJOSIMAR SUPERMERCADOS -0.0433126 0.0078436 -5.522 3.35e-08 ***
banderaDescripcionJumbo 0.0422233 0.0032465 13.006 < 2e-16 ***
banderaDescripcionMarket -0.0493066 0.0016789 -29.369 < 2e-16 ***
banderaDescripcionMi Changomas -0.0300800 0.0096393 -3.121 0.001805 **
banderaDescripcionSupermercados DIA -0.0503217 0.0026157 -19.239 < 2e-16 ***
banderaDescripcionVea 0.0148221 0.0024643 6.015 1.80e-09 ***
medicion 0.0474396 0.0008331 56.943 < 2e-16 ***
pVentasCbajo 0.0020275 0.0025423 0.798 0.425152
pVentasCmedio -0.0004915 0.0016898 -0.291 0.771157
mCuadradoCbajo -0.0006376 0.0017988 -0.354 0.722996
mCuadradoCmedio -0.0032214 0.0013716 -2.349 0.018845 *
marca7 UP -1.1539994 0.0216674 -53.260 < 2e-16 ***
marca9 DE ORO -2.0062854 0.0193610 -103.625 < 2e-16 ***
marcaACTIMEL -0.3208587 0.0260582 -12.313 < 2e-16 ***
marcaADES -1.4803950 0.0198400 -74.617 < 2e-16 ***
marcaĆGUILA -0.9287971 0.0203937 -45.543 < 2e-16 ***
marcaAIMĆ -0.2046459 0.0217557 -9.407 < 2e-16 ***
marcaALA -0.8111670 0.0183853 -44.120 < 2e-16 ***
marcaALELUYA -0.9264400 0.0257247 -36.014 < 2e-16 ***
marcaALICANTE -1.5357676 0.0219142 -70.081 < 2e-16 ***
marcaALMA MORA -0.2237137 0.0214716 -10.419 < 2e-16 ***
marcaALWAYS -0.9632061 0.0258985 -37.192 < 2e-16 ***
marcaAMANDA -0.5447960 0.0256602 -21.231 < 2e-16 ***
marcaAMOR -1.0429921 0.0255981 -40.745 < 2e-16 ***
marcaAQUARIUS -1.4449102 0.0183514 -78.736 < 2e-16 ***
marcaARCOR -1.6392252 0.0178258 -91.958 < 2e-16 ***
marcaARIEL 0.1929478 0.0205190 9.403 < 2e-16 ***
marcaARLISTĆN -0.2608174 0.0216140 -12.067 < 2e-16 ***
marcaARMONĆA -2.2040146 0.0218654 -100.799 < 2e-16 ***
marcaARROCITAS -1.4308773 0.0218717 -65.421 < 2e-16 ***
marcaAXE -0.8085877 0.0187964 -43.018 < 2e-16 ***
marcaAYUDĆN -1.3480659 0.0188116 -71.661 < 2e-16 ***
marcaBAGLEY -1.1207625 0.0213159 -52.579 < 2e-16 ***
marcaBC -2.8589195 0.0197366 -144.854 < 2e-16 ***
marcaBENJAMĆN -0.3583170 0.0220001 -16.287 < 2e-16 ***
marcaBIMBO -1.1622996 0.0255361 -45.516 < 2e-16 ***
marcaBLEM -0.4158622 0.0197053 -21.104 < 2e-16 ***
marcaBODEGA TRAPICHE -0.8225229 0.0253768 -32.412 < 2e-16 ***
marcaBOLS -0.4038059 0.0256610 -15.736 < 2e-16 ***
marcaBON O BON -2.1546456 0.0245710 -87.690 < 2e-16 ***
marcaBONAFIDE -1.0024718 0.0257890 -38.872 < 2e-16 ***
marcaBONAQUA -1.7809555 0.0217559 -81.861 < 2e-16 ***
marcaBRAHMA -1.0968801 0.0250045 -43.867 < 2e-16 ***
marcaBRANCA 0.0119388 0.0213758 0.559 0.576489
marcaBUDWEISER -0.9792220 0.0255773 -38.285 < 2e-16 ***
marcaBUTTER TOFFEES -1.2250475 0.0216566 -56.567 < 2e-16 ***
marcaCABRALES -0.5076868 0.0216619 -23.437 < 2e-16 ***
marcaCACHAMAI -1.5796366 0.0256400 -61.608 < 2e-16 ***
marcaCALLIA -0.5357679 0.0204441 -26.206 < 2e-16 ***
marcaCAMPARI 0.2246875 0.0255159 8.806 < 2e-16 ***
marcaCAĆUELAS -1.3283866 0.0204252 -65.037 < 2e-16 ***
marcaCAPITĆN MORGAN 0.4247435 0.0339414 12.514 < 2e-16 ***
marcaCAREFREE -0.4960337 0.0190571 -26.029 < 2e-16 ***
marcaCARIOCA -2.5974681 0.0257245 -100.973 < 2e-16 ***
marcaCARREFOUR -0.8688628 0.0412796 -21.048 < 2e-16 ***
marcaCASANCREM -0.9509495 0.0195276 -48.698 < 2e-16 ***
marcaCASANTO -1.7616566 0.0246664 -71.419 < 2e-16 ***
marcaCASTEL -0.6966323 0.0253776 -27.451 < 2e-16 ***
marcaCASTELL -1.5109279 0.0197863 -76.362 < 2e-16 ***
marcaCAT CHOW -0.9867196 0.0253572 -38.913 < 2e-16 ***
marcaCBSE -0.8272600 0.0202997 -40.752 < 2e-16 ***
marcaCELUSAL -1.6854197 0.0196312 -85.854 < 2e-16 ***
marcaCEPITA -1.8663587 0.0185075 -100.843 < 2e-16 ***
marcaCERAMICOL -0.9147630 0.0220002 -41.580 < 2e-16 ***
marcaCEREAL MIX -0.6462563 0.0196930 -32.817 < 2e-16 ***
marcaCEREALITAS -1.1968618 0.0217111 -55.127 < 2e-16 ***
marcaCHANDON 0.3903256 0.0219633 17.772 < 2e-16 ***
marcaCHOCOLINAS -1.5875778 0.0215265 -73.750 < 2e-16 ***
marcaCIF -1.3026681 0.0180941 -71.994 < 2e-16 ***
marcaCINDOR -1.2006211 0.0216567 -55.439 < 2e-16 ***
marcaCINZANO -0.4151771 0.0251488 -16.509 < 2e-16 ***
marcaCITRIC -1.1201624 0.0215622 -51.950 < 2e-16 ***
marcaCLIGHT -3.0738741 0.0184076 -166.990 < 2e-16 ***
marcaCOCA COLA -1.2328310 0.0186394 -66.141 < 2e-16 ***
marcaCOCINERO -0.4546462 0.0193850 -23.454 < 2e-16 ***
marcaCOLGATE -0.9793684 0.0183884 -53.260 < 2e-16 ***
marcaCOLON -0.7169816 0.0215628 -33.251 < 2e-16 ***
marcaCOMFORT -0.8186383 0.0247646 -33.057 < 2e-16 ***
marcaCOQUITAS -1.8188622 0.0253766 -71.675 < 2e-16 ***
marcaCORONA -1.1115120 0.0252233 -44.067 < 2e-16 ***
marcaCOTO -0.9610497 0.0397577 -24.173 < 2e-16 ***
marcaCRIOLLITAS -1.3127722 0.0196931 -66.662 < 2e-16 ***
marcaCRUSH -1.5708083 0.0216676 -72.496 < 2e-16 ***
marcaCRUZ DE MALTA -0.8240624 0.0217501 -37.888 < 2e-16 ***
marcaCUSENIER -0.4897582 0.0198441 -24.680 < 2e-16 ***
marcaCUTEX -1.2117025 0.0261522 -46.333 < 2e-16 ***
marcaDADA -0.2399010 0.0203221 -11.805 < 2e-16 ***
marcaDANETTE -1.4518986 0.0195290 -74.346 < 2e-16 ***
marcaDĆNICA -1.4766242 0.0253378 -58.277 < 2e-16 ***
marcaDANONINO -1.7118613 0.0195822 -87.419 < 2e-16 ***
marcaDĆA -1.3482305 0.0667427 -20.200 < 2e-16 ***
marcaDOG CHOW -0.1573592 0.0204333 -7.701 1.35e-14 ***
marcaDON DAVID 0.1436199 0.0257241 5.583 2.36e-08 ***
marcaDON SATUR -2.0849374 0.0203244 -102.583 < 2e-16 ***
marcaDON VICENTE -0.9998907 0.0197315 -50.675 < 2e-16 ***
marcaDOS ANCLAS -1.6911314 0.0182489 -92.670 < 2e-16 ***
marcaDOVE -0.9488375 0.0186009 -51.010 < 2e-16 ***
marcaDR LEMON -1.1871182 0.0219687 -54.037 < 2e-16 ***
marcaDRIVE 0.2889805 0.0263210 10.979 < 2e-16 ***
marcaECHO -1.2092731 0.0253570 -47.690 < 2e-16 ***
marcaECO DE LOS ANDES -1.9357083 0.0264207 -73.265 < 2e-16 ***
marcaELEMENTOS -0.3058845 0.0220573 -13.868 < 2e-16 ***
marcaELITE -1.3233980 0.0205110 -64.521 < 2e-16 ***
marcaESTANCIA MENDOZA -0.8198669 0.0215114 -38.113 < 2e-16 ***
marcaESTRELLA -1.0696994 0.0198607 -53.860 < 2e-16 ***
marcaETCHART -0.7583687 0.0256193 -29.601 < 2e-16 ***
marcaEXPRESS -1.3592585 0.0200093 -67.931 < 2e-16 ***
marcaEXQUISITA -1.6589731 0.0181284 -91.512 < 2e-16 ***
marcaFANTA -1.1707195 0.0193208 -60.594 < 2e-16 ***
marcaFAVORITA -1.9739965 0.0219877 -89.778 < 2e-16 ***
marcaFINCA EL PORTILLO -0.2915730 0.0203933 -14.297 < 2e-16 ***
marcaFINCA FLICHMAN -0.4942011 0.0257676 -19.179 < 2e-16 ***
marcaFINCA LAS MORAS -0.3298260 0.0216837 -15.211 < 2e-16 ***
marcaFINCA NATALINA -0.2376413 0.0256190 -9.276 < 2e-16 ***
marcaFINLANDIA -1.0233920 0.0187878 -54.471 < 2e-16 ***
marcaFOND DE CAVE 0.1273756 0.0253573 5.023 5.08e-07 ***
marcaFORMIS -1.1430363 0.0251868 -45.382 < 2e-16 ***
marcaFRIZEE -1.0029614 0.0215315 -46.581 < 2e-16 ***
marcaFRUTIGRAN -1.3750510 0.0253183 -54.311 < 2e-16 ***
marcaFUYĆ -1.4412207 0.0204799 -70.372 < 2e-16 ***
marcaGALLO -0.8046299 0.0196617 -40.924 < 2e-16 ***
marcaGALLO SNACKS -1.3713931 0.0248147 -55.265 < 2e-16 ***
marcaGANCIA -0.4767364 0.0217782 -21.891 < 2e-16 ***
marcaGATORADE -1.5216712 0.0188088 -80.902 < 2e-16 ***
marcaGENSER -0.6092270 0.0217670 -27.989 < 2e-16 ***
marcaGIACOMO -0.6433325 0.0204501 -31.459 < 2e-16 ***
marcaGILLETTE -0.3964803 0.0193519 -20.488 < 2e-16 ***
marcaGLACIAR -1.6427719 0.0215878 -76.097 < 2e-16 ***
marcaGOMES DA COSTA -0.7105046 0.0215728 -32.935 < 2e-16 ***
marcaGRANBY -1.2620609 0.0250049 -50.473 < 2e-16 ***
marcaGRANIX -1.4055022 0.0181559 -77.413 < 2e-16 ***
marcaGRANJA DEL SOL -0.6562000 0.0186061 -35.268 < 2e-16 ***
marcaGREEN HILLS -1.3507273 0.0202660 -66.650 < 2e-16 ***
marcaH2OH! -1.3189164 0.0204742 -64.418 < 2e-16 ***
marcaHARPIC -0.7875300 0.0215519 -36.541 < 2e-16 ***
marcaHEINEKEN -0.6992505 0.0246989 -28.311 < 2e-16 ***
marcaHELLMANN'S -1.4147374 0.0187365 -75.507 < 2e-16 ***
marcaHERBAL ESSENCES -0.8212641 0.0219754 -37.372 < 2e-16 ***
marcaHEREFORD -1.2025251 0.0214233 -56.132 < 2e-16 ***
marcaHIGIENOL -0.9166445 0.0198543 -46.169 < 2e-16 ***
marcaHILERET -0.9793820 0.0191942 -51.025 < 2e-16 ***
marcaHINDS -0.5988580 0.0258104 -23.202 < 2e-16 ***
marcaHIRAM WALKER -0.2030948 0.0253772 -8.003 1.22e-15 ***
marcaHOGAREĆAS -1.9132810 0.0256185 -74.684 < 2e-16 ***
marcaHUGGIES -0.4164240 0.0197329 -21.103 < 2e-16 ***
marcaIGUANA -1.2625556 0.0257238 -49.081 < 2e-16 ***
marcaIMPERIAL -0.9303262 0.0257891 -36.074 < 2e-16 ***
marcaISENBECK -1.2727263 0.0259437 -49.057 < 2e-16 ***
marcaJ&B 1.0652070 0.0251493 42.355 < 2e-16 ***
marcaJOHNSON'S -0.4638200 0.0205741 -22.544 < 2e-16 ***
marcaJORGITO -1.1420522 0.0218359 -52.302 < 2e-16 ***
marcaKELLOGGS -0.5234931 0.0248655 -21.053 < 2e-16 ***
marcaKESITAS -1.7563075 0.0217615 -80.707 < 2e-16 ***
marcaKILLKA 0.0371854 0.0254558 1.461 0.144077
marcaKIN -1.6452476 0.0216893 -75.855 < 2e-16 ***
marcaKINDER -1.4916719 0.0223539 -66.730 < 2e-16 ***
marcaKNORR -1.6417419 0.0180892 -90.758 < 2e-16 ***
marcaKNORR QUICK -1.1793244 0.0219502 -53.727 < 2e-16 ***
marcaKOLYNOS -1.2609304 0.0252611 -49.916 < 2e-16 ***
marcaKOTEX -0.3645838 0.0193541 -18.838 < 2e-16 ***
marcaKRACHITOS -1.5809479 0.0197036 -80.236 < 2e-16 ***
marcaLA CAMPAGNOLA -1.1950844 0.0182385 -65.525 < 2e-16 ***
marcaLA MERCED -0.5910471 0.0252803 -23.380 < 2e-16 ***
marcaLA MORENITA -0.6828839 0.0219445 -31.119 < 2e-16 ***
marcaLA SALTEĆA -1.0657972 0.0193251 -55.151 < 2e-16 ***
marcaLA SERENĆSIMA -1.2968376 0.0178081 -72.823 < 2e-16 ***
marcaLA TRANQUERA -1.1128884 0.0204633 -54.384 < 2e-16 ***
marcaLA VIRGINIA -1.2125046 0.0189847 -63.867 < 2e-16 ***
marcaLACTAL -1.5388014 0.0217556 -70.731 < 2e-16 ***
marcaLATITUD 33 -0.2233877 0.0204447 -10.926 < 2e-16 ***
marcaLAYS -0.5644608 0.0249171 -22.654 < 2e-16 ***
marcaLEVITĆ -1.1683490 0.0186952 -62.495 < 2e-16 ***
marcaLINCOLN -1.9149601 0.0215469 -88.874 < 2e-16 ***
marcaLORD CHESELINE -0.9517082 0.0251858 -37.787 < 2e-16 ***
marcaLOS ĆRBOLES -0.4062372 0.0216782 -18.739 < 2e-16 ***
marcaLUCCHETTI -1.2841314 0.0180134 -71.287 < 2e-16 ***
marcaLYSOFORM -0.9607698 0.0187548 -51.228 < 2e-16 ***
marcaMAGGI -1.5044012 0.0217615 -69.131 < 2e-16 ***
marcaMAGISTRAL -1.0694373 0.0196719 -54.364 < 2e-16 ***
marcaMAIZENA -1.4584429 0.0219135 -66.554 < 2e-16 ***
marcaMANĆ -1.6366391 0.0203857 -80.284 < 2e-16 ***
marcaMARUCHAN -1.4961990 0.0221975 -67.404 < 2e-16 ***
marcaMATARAZZO -1.3365896 0.0181644 -73.583 < 2e-16 ***
marcaMAYOLIVA -1.6437218 0.0256192 -64.160 < 2e-16 ***
marcaMAZOLA -0.6088143 0.0253768 -23.991 < 2e-16 ***
marcaMC CAIN -0.5755904 0.0253380 -22.716 < 2e-16 ***
marcaMEDIA TARDE -1.6740623 0.0252800 -66.221 < 2e-16 ***
marcaMELBA -1.9384968 0.0251675 -77.024 < 2e-16 ***
marcaMELITAS -1.8572739 0.0259440 -71.588 < 2e-16 ***
marcaMELLIZAS -1.5108915 0.0218362 -69.192 < 2e-16 ***
marcaMENDICRIM -1.0289421 0.0218601 -47.069 < 2e-16 ***
marcaMENOYO -1.6207751 0.0196227 -82.597 < 2e-16 ***
marcaMERENGADAS -1.5769607 0.0215722 -73.101 < 2e-16 ***
marcaMICHEL TORINO -1.3937570 0.0216784 -64.292 < 2e-16 ***
marcaMILLER -0.7113353 0.0253966 -28.009 < 2e-16 ***
marcaMINERVA -0.9413224 0.0255770 -36.803 < 2e-16 ***
[ reached getOption("max.print") -- omitted 107 rows ]
---
Signif. codes: 0 ā***ā 0.001 ā**ā 0.01 ā*ā 0.05 ā.ā 0.1 ā ā 1
Residual standard error: 0.3407 on 343659 degrees of freedom
Multiple R-squared: 0.7687, Adjusted R-squared: 0.7684
F-statistic: 3731 on 306 and 343659 DF, p-value: < 2.2e-16
lm_precioMultiple_log_resid = augment(lm_precioMultiple_log)
lm_precioMultiple_log_resid
mean(lm_precioMultiple_log_resid$.resid)
[1] 1.234472e-12
ggplot(lm_precioMultiple_log_resid, aes(lm_precioMultiple_log_resid$.resid)) +
geom_freqpoly(binwidth = 2.5)+
labs(fill = "propiedades_resid$.resid", title = "Poligono de frecuencia de los residuos", x = "Residuo", y = "count")
ggplot(lm_precioMultiple_log_resid, aes(sample= .std.resid))+
stat_qq()+
geom_abline()+
labs(title = "Normal QQ plot log", x = "Valores teóricos", y = "Residuos estandarizados")
Lo que se obsera en este grafico, es que si bien en los extremos la tendencia es alejarse de la recta, los valores estan mucho mas pegados a ella que en el modelo anterior, lo mismo ocurre con los valores intermedios que estan practicamente sobre la recta. Por lo antes explicado, este modelo esta mejor definido que el anterior.
ggplot(lm_precioMultiple_log_resid, aes(.fitted, .resid)) +
geom_point()+
geom_hline(yintercept = 0) +
geom_smooth(se = FALSE)+
labs(title = "Residuos versus el modelo ajustado log", x = "valores fitted", y = "Residuos")
Si bien en este caso la diferencia no es tan notoria como en el analisis anterior, se puede apreciar que los residuos no estan formando una figura tan concentrada con en el caso no logaritmico, dando una mejora al modelo en este caso. Repasando el articulo sobre la aplicacion de logaritmos para el estudio, este nuevo modelo con logaritmos podria considerarse un hibrido entre un modelo log-nivel para las covariables que no se modificaron y un modelo log-log para aquellas que si lo fueron.
lineal_coef= lm_precioMultiple %>% tidy(conf.int=TRUE)
lineal_coef_log= lm_precioMultiple_log %>% tidy(conf.int=TRUE)
ggplot(lineal_coef, aes(term, estimate))+
geom_point()+
geom_pointrange(aes(ymin = conf.low, ymax = conf.high))+
labs(title = "Coeficientes de la regresion lineal", x="", y="Estimacion e Int. Confianza") +
theme_bw() +
theme(axis.text.x = element_text(angle=90))
ggplot(lineal_coef_log, aes(term, estimate))+
geom_point()+
geom_pointrange(aes(ymin = conf.low, ymax = conf.high))+
labs(title = "Coeficientes de la regresion lineal", x="", y="Estimacion e Int. Confianza") +
theme_bw() +
theme(axis.text.x = element_text(angle=90))
ggplot(lineal_coef, aes(reorder(term, -p.value), p.value, fill=p.value))+
geom_bar(stat = 'identity', aes(fill=p.value))+
geom_hline(yintercept = 0.05) +
labs(title = "P-valor de los regresores para multiple", x="", y="P-valor") +
theme_bw() +
theme(axis.text.x = element_text(angle=90)) +
scale_fill_gradient2(high='firebrick', low = 'forestgreen', mid='yellow2',midpoint = 0.5 )
ggplot(lineal_coef_log, aes(reorder(term, -p.value), p.value, fill=p.value))+
geom_bar(stat = 'identity', aes(fill=p.value))+
geom_hline(yintercept = 0.05) +
labs(title = "P-valor de los regresores para multiple log", x="", y="P-valor") +
theme_bw() +
theme(axis.text.x = element_text(angle=90)) +
scale_fill_gradient2(high='firebrick', low = 'forestgreen', mid='yellow2',midpoint = 0.5 )
multiple = lm_precioMultiple %>% glance() %>% select(r.squared, adj.r.squared, p.value)
multiple_log = lm_precioMultiple_log %>% glance() %>% select(r.squared, adj.r.squared, p.value)
bind_rows(multiple, multiple_log) %>% mutate(modelo= c('multiple', 'multiple_log'))
NA
# Vector con los salarios
#prod_precios = precios_train$precio
# Matriz con los regresores
#prod_mtx = model.matrix(precio~ banderaDescripcion + sucursalTipo + medicion + pVentasC + mCuadradoC, data = precios_train)
prod_precios = preciosModelo_log$precio
prod_mtx = model.matrix(precio~ banderaDescripcion + medicion + pVentasC + mCuadradoC + marca, data = preciosModelo_log)
# Modelo Lasso
lasso.mod=glmnet(x=prod_mtx, # Matriz de regresores
y=prod_precios, #Vector de la variable a predecir
alpha=1, # Indicador del tipo de regularizacion
standardize = F) # Que esta haciendo este parametro?
lasso_coef = lasso.mod %>% tidy()
lasso_coef
NA
plot(lasso.mod, 'lambda')
plot(lasso.mod)
# Graficos para los valores de lambda en ggplot.
g1=lasso_coef %>% ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw() + theme(legend.position = 'none') +
labs(title="Lasso con Intercepto", y="Coeficientes")
g2=lasso_coef %>% filter(term!='(Intercept)') %>%
ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw() + theme(legend.position = 'none') +
labs(title="Lasso sin Intercepto", y="Coeficientes")
plot_grid(g1,g2)
lasso_cv=cv.glmnet(x=prod_mtx,y=prod_precios,alpha=1, standardize = T)
lasso_cv
Call: cv.glmnet(x = prod_mtx, y = prod_precios, alpha = 1, standardize = T)
Measure: Mean-Squared Error
Lambda Measure SE Nonzero
min 2.039e-05 0.1166 0.0004265 306
1se 8.232e-05 0.1170 0.0004295 305
plot(lasso_cv)
El grƔfico nos muestra la media del MSE con su limite superior e inferior y la cantidad de varaibles que sobreviven para cada valor de lambda.
# Información de CV en dataframe con tidy
lasso_cv %>% tidy()
NA
# Lambda minimo y lambda a 1 desvio estandar
lasso_cv %>% glance()
NA
# Selección lambda óptimo
lasso_lambda_opt = lasso_cv$lambda.min
# Entrenamiento modelo óptimo
lasso_opt = glmnet(x=prod_mtx, # Matriz de regresores
y=prod_precios, #Vector de la variable a predecir
alpha=1, # Indicador del tipo de regularizacion
standardize = TRUE, # Estandarizamos
lambda = lasso_lambda_opt)
# Salida estandar
#lasso_opt
# Tidy
lasso_opt %>% tidy()
NA
Las variables explican el 76 % del deviance.
#Modelo ridge
ridge.mod=glmnet(x=prod_mtx, # Matriz de regresores
y=prod_precios, #Vector de la variable a predecir
alpha=0, # Indicador del tipo de regularizacion
standardize = TRUE)
#Coeficientes tidy
ridge_coef= ridge.mod %>% tidy()
ridge_coef
NA
plot(ridge.mod, 'lambda')
plot(ridge.mod)
NA
NA
g1=ridge_coef %>% ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw() + theme(legend.position = 'none') +
labs(title="Ridge con Intercepto", y="Coeficientes")
g2=ridge_coef %>% filter(term!='(Intercept)') %>%
ggplot(., aes(log(lambda), estimate, group=term, color=term)) + geom_line() + theme_bw() + theme(legend.position = 'none') +
labs(title="Ridge sin Intercepto", y="Coeficientes")
plot_grid(g1,g2)
ridge_cv=cv.glmnet(x=prod_mtx,y=prod_precios,alpha=0, standardize = T)
plot(ridge_cv)
# Selección lambda óptimo
ridge_lambda_opt = ridge_cv$lambda.min
# Entrenamiento modelo óptimo
ridge_opt = glmnet(x=prod_mtx, # Matriz de regresores
y=prod_precios, #Vector de la variable a predecir
alpha=0, # Indicador del tipo de regularizacion
standardize = TRUE, # Estandarizamos
lambda = ridge_lambda_opt)
# Salida estandar
#ridge_opt
ridge_opt %>% tidy()
NA
ridge_dev = ridge_coef %>% select(lambda, dev.ratio) %>% distinct() %>%
ggplot(., aes(log(lambda), dev.ratio)) +
geom_point() +
geom_line() +
geom_vline(xintercept = log(ridge_lambda_opt), color='steelblue', size=1.5) +
labs(title='Ridge: Deviance') +
theme_bw()
lasso_dev = lasso_coef %>% select(lambda, dev.ratio) %>% distinct() %>%
ggplot(., aes(log(lambda), dev.ratio)) +
geom_point() +
geom_line() +
geom_vline(xintercept = log(lasso_lambda_opt), color='firebrick', size=1.5) +
labs(title='Lasso: Deviance') +
theme_bw()
plot_grid(ridge_dev, lasso_dev)
NA
NA
Compracion de la relación entre el porcentaje de deviance explicada y lambda para los tres tipos de modelos que realizamos
eval_results <- function(true, predicted, df) {
SSE <- sum((predicted - true)^2)
SST <- sum((true - mean(true))^2)
R_square <- 1 - SSE / SST
RMSE = sqrt(SSE/nrow(df))
# Model performance metrics
data.frame(
RMSE = RMSE,
Rsquare = R_square
)
}
# Prediccion y evaluacion en train data Lasso
predictions_train <- predict(lasso_opt, s = lasso_lambda_opt, newx = prod_mtx)
eval_results(preciosModelo$precio, predictions_train, preciosModelo)
NA
NA
# Prediction and evaluation on train data Ridge
predictions_train <- predict(ridge_opt, s = ridge_lambda_opt, newx = prod_mtx)
eval_results(preciosModelo$precio, predictions_train, preciosModelo)
NA